library(survival)
sbl <- read.csv('badware.csv', head=T)

#change NA to 16 for sites that remain infected at the end of the study period.
sbl$dayFullyClean[sbl$fullyCleanCensor==0]<-16
sbl$onGBL<-sbl$googbad0==1|sbl$googbad1==1|sbl$goobad2==1|sbl$googbad4==1|sbl$googbad8==1|sbl$googbad16==1|sbl$domainblockedgoogle==1
sbl$poolf<-factor(sbl$pool)
sbl$onGBLf<-factor(sbl$onGBL)
sbl$malf<-factor(sbl$mal)
sbl$isExecutablef<-factor(sbl$isExecutable)

noGoogle<-sbl[sbl$googbad0==0&sbl$googbad1==0&sbl$goobad2==0&sbl$googbad4==0&sbl$googbad8==0&sbl$googbad16==0&sbl$domainblockedgoogle==0,]
hasGoogle<-sbl[sbl$googbad0==1|sbl$googbad1==1|sbl$goobad2==1|sbl$googbad4==1|sbl$googbad8==1|sbl$googbad16==1|sbl$domainblockedgoogle==1,]
#note: domainblockedgoogle means that at some day during the study, google was blocking the domain but not the fqdn specifically. This also includes
#IP addresses, b/c if you look up IP addresses in google safe browsing, it won't show anything even if the IP is on google's blacklist.

#Figure 3
plot(survfit(Surv(time=sbl$dayFullyClean[sbl$pool==0],event=sbl$fullyCleanCensor[sbl$pool==0],type='right')~1),lwd=2,main='Survival fn for time to permanent cleanup\n(all sites)',xlab='days to cleanup',cex.main=1.5,cex.lab=1.25,ylab='fraction of websites still infected after x days')
lines(survfit(Surv(time=sbl$dayFullyClean[sbl$pool==1],event=sbl$fullyCleanCensor[sbl$pool==1],type='right')~1),col='red',lwd=2,lty='dotdash')
lines(survfit(Surv(time=sbl$dayFullyClean[sbl$pool==2],event=sbl$fullyCleanCensor[sbl$pool==2],type='right')~1),col='blue',lwd=2,lty='dotted')

plot(survfit(Surv(time=noGoogle$dayFullyClean[noGoogle$pool==0],event=noGoogle$fullyCleanCensor[noGoogle$pool==0],type='right')~1),lwd=2,main='Survival fn for time to permanent cleanup\n(sites not in Google Safe Browsing blacklist)',xlab='days to cleanup',cex.main=1.5,cex.lab=1.25)
lines(survfit(Surv(time=noGoogle$dayFullyClean[noGoogle$pool==1],event=noGoogle$fullyCleanCensor[noGoogle$pool==1],type='right')~1),col='red',lwd=2,lty='dotdash')
lines(survfit(Surv(time=noGoogle$dayFullyClean[noGoogle$pool==2],event=noGoogle$fullyCleanCensor[noGoogle$pool==2],type='right')~1),col='blue',lwd=2,lty='dotted')

plot(survfit(Surv(time=hasGoogle$dayFullyClean[hasGoogle$pool==0],event=hasGoogle$fullyCleanCensor[hasGoogle$pool==0],type='right')~1),lwd=2,main='Survival fn for time to permanent cleanup\n (sites also in Google Safe Browsing blacklist)',xlab='days to cleanup',cex.main=1.5,cex.lab=1.25)
lines(survfit(Surv(time=hasGoogle$dayFullyClean[hasGoogle$pool==1],event=hasGoogle$fullyCleanCensor[hasGoogle$pool==1],type='right')~1),col='red',lwd=2,lty='dotdash')
lines(survfit(Surv(time=hasGoogle$dayFullyClean[hasGoogle$pool==2],event=hasGoogle$fullyCleanCensor[hasGoogle$pool==2],type='right')~1),col='blue',lwd=2,lty='dotted')
legend(col=c('black','black','red','blue'),leg=c('Control (no notice)','Control (95% CI)','Minimal notice','Detailed notice'),x='bottomright',lty=c('solid','dashed','dotdash','dotted'),lwd=2,cex=1.5)

#Figure 4
plot(survfit(Surv(time=dayFullyClean[mal==0&pool==2],event=fullyCleanCensor[mal==0&pool==2],type='right')~1, data=noGoogle),lwd=2,col='green',main='Survival fn for full notices, compromised vs. purely malicious',cex.main=1.5,cex.lab=1.25,ylab='fraction of websites still infected after x days',xlab='days to cleanup')
lines(survfit(Surv(time=dayFullyClean[mal==1&pool==2],event=fullyCleanCensor[mal==1&pool==2],type='right')~1, data=noGoogle),lwd=2,col='orange',lty='longdash')
legend(col=c('green','green','orange'),leg=c('Detailed notice, compromised site','Detailed notice, compromised (95% CI)','Detailed notice, malicious site'),x='bottomright',lty=c('solid','dashed','longdash'),lwd=2,cex=1.5)

#Figure 5
plot(survfit(Surv(time=dayFullyClean[isExecutable==0&pool==2],event=fullyCleanCensor[isExecutable==0&pool==2],type='right')~1, data=noGoogle),lwd=2,col='brown',main='Survival fn for full notices, executables vs. other',cex.main=1.5,cex.lab=1.25,ylab='fraction of websites still infected after x days',xlab='days to cleanup')
lines(survfit(Surv(time=dayFullyClean[isExecutable==1&pool==2],event=fullyCleanCensor[isExecutable==1&pool==2],type='right')~1, data=noGoogle),lwd=2,col='cyan',lty='twodash')
legend(col=c('brown','brown','cyan'),leg=c('Detailed notice, not executable','Detailed notice, not executable (95% CI)', 'Detailed notice, executable'),x='bottomright',lty=c('solid','dashed','longdash','twodash'),lwd=2,cex=1.5)



ct<-table(sbl[,c('poolf','fullyCleanCensor')])
fracClean<-ct[,2]/rowSums(ct)
cleanedSites<-sbl[sbl$fullyCleanCensor==1,]
bigSum<-cbind(num=summary(sbl$poolf),
      fracClean=100*fracClean,
      #meanDays2Clean=tapply(cleanedSites$dayFullyClean,cleanedSites$poolf,mean,na.rm=T),
      medianDays2Clean=tapply(cleanedSites$dayFullyClean,cleanedSites$poolf,median,na.rm=T))

makeSummary <- function(df,param,paramvalue) {
            dfsub<-df[df[,c(param)]==paramvalue,]
            ct<-table(dfsub[,c('poolf','fullyCleanCensor')])
            fracClean<-ct[,2]/rowSums(ct)
            cleanedSites<-dfsub[dfsub$fullyCleanCensor==1,]
            cbind(num=summary(dfsub$poolf),
                  fracClean=100*fracClean,
                        #meanDays2Clean=tapply(cleanedSites$dayFullyClean,cleanedSites$poolf,mean,na.rm=T),
                        medianDays2Clean=tapply(cleanedSites$dayFullyClean,cleanedSites$poolf,median,na.rm=T))
}

#table 1
bigSum<-cbind(bigSum,makeSummary(sbl,'mal',T),makeSummary(sbl,'mal',F),makeSummary(sbl,'isExecutable',T),makeSummary(sbl,'isExecutable',F))
t(bigSum)

#table 2
googleSum<-cbind(makeSummary(sbl,'onGBL',FALSE),makeSummary(sbl,'onGBL',T))
t(googleSum)
